bitkeeper revision 1.122.5.1 (3e71d6fe7FguR-sT8s7ha1pGTKuYSA)
authorkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Fri, 14 Mar 2003 13:19:58 +0000 (13:19 +0000)
committerkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Fri, 14 Mar 2003 13:19:58 +0000 (13:19 +0000)
Many files:
  Sort out interrupt distribution in SMP systems. We now periodically redistribute towrds the most idle processors. There's more sport to be had here though...

xen/arch/i386/io_apic.c
xen/arch/i386/irq.c
xen/arch/i386/process.c
xen/arch/i386/setup.c
xen/arch/i386/smpboot.c
xen/common/schedule.c
xen/include/asm-i386/hardirq.h
xen/include/asm-i386/smpboot.h
xen/include/xeno/sched.h

index 6ad37f2399b45fe92e7f68658bdc00806dba3483..fbea77e6467ca982c1b50972f026b0c4b3291311 100644 (file)
@@ -189,6 +189,86 @@ static void clear_IO_APIC (void)
                        clear_IO_APIC_pin(apic, pin);
 }
 
+static void set_ioapic_affinity (unsigned int irq, unsigned long mask)
+{
+       unsigned long flags;
+
+       /*
+        * Only the first 8 bits are valid.
+        */
+       mask = mask << 24;
+       spin_lock_irqsave(&ioapic_lock, flags);
+       __DO_ACTION(1, = mask, )
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+#if CONFIG_SMP
+
+typedef struct {
+       unsigned int cpu;
+       unsigned long timestamp;
+} ____cacheline_aligned irq_balance_t;
+
+static irq_balance_t irq_balance[NR_IRQS] __cacheline_aligned
+                       = { [ 0 ... NR_IRQS-1 ] = { 0, 0 } };
+
+extern unsigned long irq_affinity [NR_IRQS];
+
+#endif
+
+#define IDLE_ENOUGH(cpu,now) \
+               (idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1))
+
+#define IRQ_ALLOWED(cpu,allowed_mask) \
+               ((1 << cpu) & (allowed_mask))
+
+static unsigned long move(int curr_cpu, unsigned long allowed_mask, unsigned long now, int direction)
+{
+       int search_idle = 1;
+       int cpu = curr_cpu;
+
+       goto inside;
+
+       do {
+               if (unlikely(cpu == curr_cpu))
+                       search_idle = 0;
+inside:
+               if (direction == 1) {
+                       cpu++;
+                       if (cpu >= smp_num_cpus)
+                               cpu = 0;
+               } else {
+                       cpu--;
+                       if (cpu == -1)
+                               cpu = smp_num_cpus-1;
+               }
+       } while (!IRQ_ALLOWED(cpu,allowed_mask) ||
+                       (search_idle && !IDLE_ENOUGH(cpu,now)));
+
+       return cpu;
+}
+
+static inline void balance_irq(int irq)
+{
+#if CONFIG_SMP
+       irq_balance_t *entry = irq_balance + irq;
+       unsigned long now = jiffies;
+
+       if (unlikely(entry->timestamp != now)) {
+               unsigned long allowed_mask;
+               int random_number;
+
+               rdtscl(random_number);
+               random_number &= 1;
+
+               allowed_mask = cpu_online_map & irq_affinity[irq];
+               entry->timestamp = now;
+               entry->cpu = move(entry->cpu, allowed_mask, now, random_number);
+               set_ioapic_affinity(irq, apicid_to_phys_cpu_present(entry->cpu));
+       }
+#endif
+}
+
 /*
  * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
  * specific CPU-side IRQs.
@@ -1233,6 +1313,7 @@ static unsigned int startup_edge_ioapic_irq(unsigned int irq)
  */
 static void ack_edge_ioapic_irq(unsigned int irq)
 {
+       balance_irq(irq);
        if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
                                        == (IRQ_PENDING | IRQ_DISABLED))
                mask_IO_APIC_irq(irq);
@@ -1272,6 +1353,8 @@ static void end_level_ioapic_irq (unsigned int irq)
        unsigned long v;
        int i;
 
+       balance_irq(irq);
+
 /*
  * It appears there is an erratum which affects at least version 0x11
  * of I/O APIC (that's the 82093AA and cores integrated into various
@@ -1328,19 +1411,6 @@ static void end_level_ioapic_irq (unsigned int irq)
 
 static void mask_and_ack_level_ioapic_irq (unsigned int irq) { /* nothing */ }
 
-static void set_ioapic_affinity (unsigned int irq, unsigned long mask)
-{
-       unsigned long flags;
-       /*
-        * Only the first 8 bits are valid.
-        */
-       mask = mask << 24;
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       __DO_ACTION(1, = mask, )
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
 /*
  * Level and edge triggered IO-APIC interrupts need different handling,
  * so we use two separate IRQ descriptors. Edge triggered IRQs can be
index 312cfe797067088f27ae862130912985837c3fbf..e799542b1b774debf0cb2a55243951eaa1655966 100644 (file)
 irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned =
 { [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}};
 
+#ifdef CONFIG_SMP
+/* NB. XXX We'll want some way of fiddling with this from DOM0. */
+unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL };
+#endif
+
 /*
  * Special irq handlers.
  */
index 3c048d72bfbff4ac4c538feb90320e1c664e069b..c9736a2093f535e4d477d39c54790b3da840bb72 100644 (file)
@@ -85,6 +85,7 @@ void cpu_idle (void)
 
     for ( ; ; )
     {
+        irq_stat[cpu].idle_timestamp = jiffies;
         while (!current->hyp_events && !softirq_pending(cpu))
             default_idle();
         do_hyp_events();
index 1e5f35a73e44325a4136ce6863bd3337f4cf74e3..6b2c380adcc7b7ae46393ea6a15a0522657a9dff 100644 (file)
@@ -20,6 +20,7 @@ unsigned long wait_init_idle;
 
 /* Basic page table for each CPU in the system. */
 l2_pgentry_t *idle_pg_table[NR_CPUS] = { idle0_pg_table };
+struct task_struct *idle_task[NR_CPUS] = { &idle0_task };
 
 /* for asm/domain_page.h, map_domain_page() */
 unsigned long *mapcache[NR_CPUS];
index 0955db82f33b7021856821983abd475c5d30a033..401b8f80206e305586591273ddff697a42aa35ff 100644 (file)
@@ -699,6 +699,8 @@ static void __init do_boot_cpu (int apicid)
 
     SET_DEFAULT_FAST_TRAP(&idle->thread);
 
+    idle_task[cpu] = idle;
+
     /* start_eip had better be page-aligned! */
     start_eip = setup_trampoline();
 
index 787b43d900b38b07b362d06ad4cec5d288a45dd7..1c8d751e4de602a59750fc7150cc4280c2200bf5 100644 (file)
@@ -174,6 +174,7 @@ long schedule_timeout(long timeout)
 }
 
 /* RN: XXX turn this into do_halt() */
+/* KAF: No, turn it back into do_yield()! */
 /*
  * yield the current process
  */
@@ -281,6 +282,15 @@ asmlinkage void schedule(void)
     return;
 }
 
+
+/* No locking needed -- pointer comparison is safe :-) */
+int idle_cpu(int cpu)
+{
+    struct task_struct *p = schedule_data[cpu].curr;
+    return p == idle_task[cpu];
+}
+
+
 /*
  * The scheduling timer.
  */
index bad529b882b68b8b230c54f5830ac08ba2626d7c..f0a9024dcd72f27a6d69f92ab23f0eda07bf7a26 100644 (file)
@@ -10,6 +10,7 @@ typedef struct {
        unsigned int __local_irq_count;
        unsigned int __local_bh_count;
        unsigned int __syscall_count;
+       unsigned long idle_timestamp;
 } ____cacheline_aligned irq_cpustat_t;
 
 #include <xeno/irq_cpustat.h>  /* Standard mappings for irq_cpustat_t above */
index 3ca484d53108a0306f3b8b6181695648c647c9d4..4017902c6942fad9390975615699cfd611465012 100644 (file)
@@ -30,6 +30,15 @@ static inline void detect_clustered_apic(char* oem, char* prod)
                /*Start cyclone clock*/
                cyclone_setup(0);
        }
+       else if (!strncmp(oem, "IBM ENSW", 8) && !strncmp(prod, "RUTHLESS SMP", 9)){
+               clustered_apic_mode = CLUSTERED_APIC_XAPIC;
+               apic_broadcast_id = APIC_BROADCAST_ID_XAPIC;
+               int_dest_addr_mode = APIC_DEST_PHYSICAL;
+               int_delivery_mode = dest_Fixed;
+               esr_disable = 1;
+               /*Start cyclone clock*/
+               cyclone_setup(0);
+       }
        else if (!strncmp(oem, "IBM NUMA", 8)){
                clustered_apic_mode = CLUSTERED_APIC_NUMAQ;
                apic_broadcast_id = APIC_BROADCAST_ID_APIC;
@@ -116,15 +125,6 @@ static inline int target_cpus(void)
        return cpu_online_map;
 }
 #else
-/* KAF Xen: Round-robin allocate IRQs to CPUs. */
-static inline int target_cpus(void)
-{
-    static unsigned int cpu_field = 1;
-    do { 
-        cpu_field <<= 1; 
-        if ( cpu_field == 0x100 ) cpu_field = 1; /* logical field == 8 bits */ 
-    } while ( (cpu_field & cpu_online_map) == 0 );
-    return cpu_field;
-}
+#define target_cpus() (0xFF)
 #endif
 #endif
index 6d1842a2ea9ecfa73f13a046d07427710756f0a8..49fca609ec55dea76b8f3c3ca496497cb94e61bc 100644 (file)
@@ -149,6 +149,7 @@ struct task_struct {
     next_task:   &(_t)           \
 }
 
+extern struct task_struct *idle_task[NR_CPUS];
 #define IDLE_DOMAIN_ID   (~0)
 #define is_idle_task(_p) ((_p)->domain == IDLE_DOMAIN_ID)
 
@@ -214,7 +215,8 @@ asmlinkage void schedule(void);
 
 void domain_init(void);
 
-void cpu_idle(void);
+int idle_cpu(int cpu); /* Is CPU 'cpu' idle right now? */
+void cpu_idle(void);   /* Idle loop. */
 
 #define REMOVE_LINKS(p) do { \
         (p)->next_task->prev_task = (p)->prev_task; \